source('codes/00_import_libraries.R')

# base: current 14 topics as submitted to RFS
# referee: 15 topics with Pandemic added more words and Natural Disaster added
# extended: more seed words for each topics


#------------------------------------------------------------------------
# Specifications
#------------------------------------------------------------------------

dict_name       = 'base'

narrative_dicts = readRDS('input/narrative_dicts_v2.rds')
topic_list      = narrative_dicts[[dict_name]] %>% names()

PLS1            = 'PLS_All_recursive'
PLS2            = 'PLS12_recursive'
x_list          = c(topic_list, PLS1, PLS2)

h               = 1
yvar            = 'mkt'

year_grid       = data.frame(
  beg           = c(1871, 1871, 1950, 2000),
  end           = c(2019, 1949, 2019, 2019)
)

period_list = paste0(
  year_grid$beg, '-', year_grid$end
)



#------------------------------------------------------------------------
# Data
#------------------------------------------------------------------------

dict_name = 'base'
dt = readRDS('input/combined_data.rds')[[dict_name]] %>%
  mutate(
    year = year(ym),
    across(all_of(paste0(yvar, h)), ~ .*12),
  ) %>% 
  drop_na(mkt1) %>% 
  select(ym, year, contains(yvar), any_of(x_list))



#------------------------------------------------------------------------
# Functions
#------------------------------------------------------------------------

ols_res = function(xvar, yvar, h, yr_range) {
  # this function takes in the xvar and year and fit ols to all returns
  
  # fit an ols model to all returns 
  mod_list = lapply(paste0(yvar, h),
    fn_fit_ols,
    xvar = xvar,
    df   = dt %>% filter(year %in% yr_range)
  )
  
  # compute newey west se
  mod_robust = mapply(
    function(mod, h){coeftest(mod, vcov.=NeweyWest(mod, lag=h, prewhite=FALSE))},
    mod_list,
    h,
    SIMPLIFY = FALSE
  )
  
  # return the adjusted r2
  is_r2 = lapply(mod_list, function(mod) glance(mod)$adj.r.squared * 100) %>%
    unlist()

  # create huxtale from regression
  out = huxreg(
    mod_robust,
    coefs        = xvar,
    error_format = "({statistic})",
    stars        = c(`***` = 0.01, `**` = 0.05, `*` = 0.1),
    statistics   = c(0),
    borders      = 0,
    outer_borders= 0,
    note         = NULL
  )
  
  # add the r2 to the huxtable
  out = out[-1,-1] %>% rbind(is_r2)
  
  return(out)
}



#------------------------------------------------------------------------
# Results
#------------------------------------------------------------------------

n = length(topic_list) + 2
n = length(x_list)
row_borders = seq(3, n*3, by = 3)

row_names = data.frame(
  x  = c(topic_list %>% str_replace_all('_', ' ') %>% tools::toTitleCase(), 
         'PLS', 'Shiller PLS'),
  t  = rep('$t$-stat', n),
  r2 = rep('$R^2$', n)
  # na = rep('', n)
) %>% 
  t() %>% 
  c()


output = lapply(1 : nrow(year_grid), function(i) {
  lapply(
    x_list,
    ols_res,
    yvar       = yvar,
    h          = 1,
    yr_range   = year_grid$beg[i]:year_grid$end[i]
  ) %>%
    do.call(what = rbind)
}) %>%
  do.call(what = cbind) %>%
  
  insert_column(row_names) %>% 
  set_bottom_border(row_borders, everywhere, 0.8) %>% 
  insert_row(c('', period_list)) %>% 
  set_number_format(1, everywhere, 0) %>% 
  set_number_format(-1, -1, 2) %>% 
  set_tb_borders(1, everywhere, 0.8) %>% 
  set_all_padding(0) %>% 
  set_escape_contents(F)



#------------------------------------------------------------------------
# Export results
#------------------------------------------------------------------------

quick_xlsx(output, file = glue('output/tables/Table_3.xlsx'))
# cat(to_latex(output, tabular_only = TRUE), file = glue('output/tables/Table_3.tex'))





